library(tidyverse)
library(janitor)
library(here)
library(hrbrthemes)
library(skimr)
library(kableExtra)
library(DT)
library(outbreaks)
library(linelist)
library(earlyR)
library(i2extras)
library(projections)
library(epicontacts)
theme_set(theme_minimal())

EVD Simulated Outbreak

Data Cleaning

evd_linelist_raw <- here::here("data", "phm_evd_linelist_2017-10-27.xlsx") %>%
  rio::import()

evd_contacts_raw <-  here::here("data", "phm_evd_contacts_2017-10-27.xlsx") %>%
  rio::import()

cleaning_rules <- here::here("data", "phm_evd_cleaning_rules.xlsx") %>%
  rio::import()
evd_linelist_raw
## data.frame [12, 6] 
## id            chr 39e9dc 664549 B4D8AA 51883d 947e40 9aa197
## Date of Onset chr 43018 43024 43025 “18// 10/2017” 43028 43028
## Date.Report.  chr 43030 43032 “23/10/2017” 22-10-2017 “2017/10/25” “2017-10-23”
## SEX.          chr Female Male male male f f
## Âge           dbl 62 28 54 57 23 66
## location      chr Pseudopolis peudopolis Ankh-Morpork PSEUDOPOLIS Ankh Morpork AM
evd_contacts_raw
## data.frame [11, 2] 
## Source Case #ID chr 51883d b4d8aa 39e9dc 39E9DC 51883d 39e9dc
## case.id         chr 185911 e4b0a2 b4d8aa 601d2e 9AA197 51883d
cleaning_rules
## data.frame [6, 3] 
## bad_spelling  chr f m .missing am peudopolis .missing
## good_spelling chr female male unknown ankh_morpork pseudopolis unknown
## variable      chr sex sex sex location location location
evd_linelist <- evd_linelist_raw %>%
  linelist::clean_data(wordlist = cleaning_rules) %>%
  mutate(across(contains("date"), guess_dates))

evd_linelist
## data.frame [12, 6] 
## id            chr  39e9dc 664549 b4d8aa 51883d 947e40 9aa197
## date_of_onset date 2017-10-10 2017-10-16 2017-10-17 2017-10-18 2017-10-20 2017-10-20
## date_report   date 2017-10-22 2017-10-24 2017-10-23 2017-10-22 2017-10-25 2017-10-23
## sex           chr  female male male male female female
## age           dbl  62 28 54 57 23 66
## location      chr  pseudopolis pseudopolis ankh_morpork pseudopolis ankh_morpork ankh_morpork
evd_contacts <- evd_contacts_raw %>%
  linelist::clean_data()

evd_contacts
## data.frame [11, 2] 
## source_case_id chr 51883d b4d8aa 39e9dc 39e9dc 51883d 39e9dc
## case_id        chr 185911 e4b0a2 b4d8aa 601d2e 9aa197 51883d

Examine transmission chains

evd_chains <- make_epicontacts(
  linelist = evd_linelist,
  contacts = evd_contacts,
  id = "id",
  from = "source_case_id",
  to = "case_id",
  directed = TRUE
)
plot(
  evd_chains,
  node_shape = "sex",
  node_color = "location",
  shapes = c(male = "male", female = "female", unknown = "question-circle")
)

Examining incidence curves

evd_incidence <- incidence::incidence(
  evd_linelist$date_of_onset,
  last_date = "2017-10-27"
  )

plot(evd_incidence, show_cases = TRUE)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please use `guide = "none"` instead.

Estimating the growth rate (r)

evd_growth <- evd_incidence %>%
  incidence::fit()
## Warning in incidence::fit(.): 10 dates with incidence of 0 ignored for fitting
plot(evd_incidence, show_cases = TRUE, fit = evd_growth)
## the argument `show_cases` requires the argument `stack = TRUE`
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please use `guide = "none"` instead.

Estimating the reproduction number (R)

evd_reproduction <- get_R(
  evd_incidence,
  si_mean = 15.3,
  si_sd = 9.3
)
plot(evd_reproduction) +
  geom_vline(xintercept = 1, color = "#af3939")

plot(evd_reproduction, "lambdas")
## Warning: Removed 1 rows containing missing values (position_stack).

Short term forecasting

evd_projection <- project(
  evd_incidence,
  R = sample_R(evd_reproduction, 5000),
  si = evd_reproduction$si,
  n_sim = 5000,
  n_days = 14,
  R_fix_within = TRUE
)
plot(evd_incidence) %>%
  add_projections(evd_projection, quantiles = c(0.1, 0.25, 0.5, 0.75, 0.9)) +
  scale_x_date() +
  labs(
    x = "Date of symptom onset"
  )
## Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please use `guide = "none"` instead.

apply(evd_projection, 1, summary)
##         2017-10-28 2017-10-29 2017-10-30 2017-10-31 2017-11-01 2017-11-02 2017-11-03 2017-11-04 2017-11-05 2017-11-06 2017-11-07 2017-11-08 2017-11-09 2017-11-10
## Min.        0.0000     0.0000     0.0000      0.000     0.0000     0.0000      0.000     0.0000     0.0000     0.0000     0.0000     0.0000      0.000     0.0000
## 1st Qu.     1.0000     1.0000     1.0000      1.000     1.0000     1.0000      2.000     2.0000     2.0000     2.0000     2.0000     3.0000      3.000     3.0000
## Median      2.0000     2.0000     2.0000      2.000     2.0000     3.0000      3.000     3.0000     3.0000     4.0000     4.0000     5.0000      5.000     6.0000
## Mean        1.9966     2.1304     2.2196      2.439     2.6556     2.9562      3.296     3.6848     4.1086     4.6674     5.3146     5.9666      6.811     7.7592
## 3rd Qu.     3.0000     3.0000     3.0000      3.000     4.0000     4.0000      5.000     5.0000     6.0000     6.0000     7.0000     8.0000      9.000    10.0000
## Max.       11.0000    11.0000    11.0000     14.000    16.0000    15.0000     16.000    21.0000    21.0000    28.0000    37.0000    41.0000     53.000    64.0000
apply(evd_projection, 1, function(x) mean(x > 0))
## 2017-10-28 2017-10-29 2017-10-30 2017-10-31 2017-11-01 2017-11-02 2017-11-03 2017-11-04 2017-11-05 2017-11-06 2017-11-07 2017-11-08 2017-11-09 2017-11-10 
##     0.8386     0.8624     0.8690     0.8814     0.8976     0.9154     0.9252     0.9340     0.9390     0.9590     0.9656     0.9620     0.9742     0.9778

UK COVID-19

Data Cleaning

covid_eng <- here::here("data", "covid_admissions_uk_2020_10_24.xlsx") %>%
  rio::import() %>%
  tibble() %>%
  mutate(date = lubridate::as_date(date))

covid_eng
## tibble [31614, 5] 
## date     date 2020-03-20 2020-03-20 2020-03-20 2020-03-20 2020-03-20 2020-03-20
## region   chr  East of England East of England East of England East of England East of England East ~
## org_name chr  Southend University Hospital NHS Foundation Trust Bedford Hospital NHS Trust Luton an~
## org_code chr  RAJ RC1 RC9 RCX RD8 RDD
## n        dbl  0 2 0 1 0 0
covid_eng %>%
  group_by(region) %>%
  summarise(cases = sum(n))
## tibble [7, 2] 
## region chr East of England London Midlands North East and Yorkshire North West South East
## cases  dbl 10699 25689 24199 21556 25041 14335
covid_eng %>%
  group_by(region, org_code) %>%
  summarise(cases = sum(n)) %>%
  ggplot(aes(x = cases)) +
  geom_histogram() +
  facet_wrap(region ~.) +
  labs(
    title = "COVID-19 admissions by NHS region",
    x = "Number of COVID-19 cases in a single trust",
    y = "Frequency"
  )
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Epidemic curves

covid_incidence <- incidence2::incidence(
  covid_eng,
  date,
  interval = "week",
  groups = region,
  count = n
  )
summary(covid_incidence)
## date range: [2020-W12] to [2020-W43]
## n: 127806
## interval: 1 (Monday) week 
## cumulative: FALSE
## timespan: 224 days
## 
## 1 grouped variable
## 
##   region                       n
##   <chr>                    <dbl>
## 1 East of England          10699
## 2 London                   25689
## 3 Midlands                 24199
## 4 North East and Yorkshire 21556
## 5 North West               25041
## 6 South East               14335
## 7 South West                6287
covid_incidence %>%
  plot(fill = region, col_pal = muted, angle = 45, legend = "bottom") +
  labs(title = "Weekly incidence of COVID-19 hospital admissions in England")

covid_incidence %>%
  facet_plot(col_pal = muted, angle = 45, date_format = "%d%m%Y") +
  labs(
    title = "Weekly incidence of COVID-19 hospital admissions in England by region"
    )

Growth rates

covid_incidence_recent <- incidence2::incidence(
  covid_eng,
  date,
  groups = region,
  count = n
  ) %>%
  keep_last(4 * 7) %>%
  keep_first(3 * 7)
summary(covid_incidence_recent)
## date range: [2020-09-27] to [2020-10-17]
## n: 9788
## interval: 1 day
## cumulative: FALSE
## timespan: 21 days
## 
## 1 grouped variable
## 
##   region                       n
##   <chr>                    <dbl>
## 1 East of England            405
## 2 London                     931
## 3 Midlands                  1688
## 4 North East and Yorkshire  2617
## 5 North West                3224
## 6 South East                 534
## 7 South West                 389
facet_plot(covid_incidence_recent)

covid_negbin_fit <- covid_incidence_recent %>%
  i2extras::fit_curve(model = "negbin")
plot(covid_negbin_fit)

covid_negbin_growth <- covid_negbin_fit %>%
  i2extras::growth_rate(growth_decay_time = TRUE)

covid_negbin_growth
## tibble [6, 10] 
## count_variable  chr n n n n n n
## region          chr East of England Midlands North East and Yorkshire North West South East South W~
## model           lst negbin [29, 1] negbin [29, 1] negbin [29, 1] negbin [29, 1] negbin [29, 1] negb~
## r               dbl 0.048889 0.053999 0.049356 0.061524 0.072597 0.097432
## r_lower         dbl 0.031449 0.042792 0.042414 0.051771 0.055169 0.07642
## r_upper         dbl 0.06654 0.065281 0.056332 0.071329 0.090316 0.118985
## growth_or_decay chr doubling doubling doubling doubling doubling doubling
## time            dbl 14.178045 12.836348 14.043732 11.266349 9.54785 7.114162
## time_lower      dbl 10.416959 10.617951 12.30475 9.717624 7.674658 5.825487
## time_upper      dbl 22.040373 16.197916 16.342247 13.388677 12.564161 9.0702
min_date <- min(covid_incidence_recent$date_index)
max_date <- max(covid_incidence_recent$date_index)

# covid_incidence_recent %>%
#   incidence2::get_dates() %>%
#   min()

covid_negbin_growth %>%
  ggplot(aes(x = r, y = fct_reorder(region, r))) +
  geom_errorbar(aes(xmin = r_lower, xmax = r_upper), width = 0.2) +
  geom_point(color = "#7c1073", size = 3) +
  geom_vline(aes(xintercept = 0), linetype = 2, color = "#3b3b3b") +
  labs(
    title = "Estimated daily growth rate of COVID-19 in England by region",
    subtitle = glue::glue(
      "based on hospital admissions, {min_date} - {max_date}"
      )
  )

covid_negbin_growth %>%
  ggplot(aes(x = time, y = fct_reorder(region, time))) +
  geom_errorbar(aes(xmin = time_lower, xmax = time_upper), width = 0.2) +
  geom_point(color = "#7c1073", size = 3) +
  geom_vline(aes(xintercept = 0), linetype = 2, color = "#3b3b3b") +
  labs(
    title = "Estimated doubling time of COVID-19 in England by region",
    subtitle = glue::glue(
      "based on hospital admissions, {min_date} - {max_date}"
      )
  )